// Adapted from https://raw.githubusercontent.com/google/safehtml/3c4cd5b5d8c9a6c5882fba099979e9f50b65c876/style.go

// Copyright (c) 2017 The Go Authors. All rights reserved.
//
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file or at
// https://developers.google.com/open-source/licenses/bsd

package safehtml

import (
	"bytes"
	"fmt"
	"net/url"
	"regexp"
	"strings"
)

// SanitizeCSS attempts to sanitize CSS properties.
func SanitizeCSS(property, value string) (string, string) {
	property = SanitizeCSSProperty(property)
	if property == InnocuousPropertyName {
		return InnocuousPropertyName, InnocuousPropertyValue
	}
	return property, SanitizeCSSValue(property, value)
}

func SanitizeCSSValue(property, value string) string {
	if sanitizer, ok := cssPropertyNameToValueSanitizer[property]; ok {
		return sanitizer(value)
	}
	return sanitizeRegular(value)
}

func SanitizeCSSProperty(property string) string {
	if !identifierPattern.MatchString(property) {
		return InnocuousPropertyName
	}
	return strings.ToLower(property)
}

// identifierPattern matches a subset of valid <ident-token> values defined in
// https://www.w3.org/TR/css-syntax-3/#ident-token-diagram. This pattern matches all generic family name
// keywords defined in https://drafts.csswg.org/css-fonts-3/#family-name-value.
var identifierPattern = regexp.MustCompile(`^[-a-zA-Z]+$`)

var cssPropertyNameToValueSanitizer = map[string]func(string) string{
	"background-image":    sanitizeBackgroundImage,
	"font-family":         sanitizeFontFamily,
	"display":             sanitizeEnum,
	"background-color":    sanitizeRegular,
	"background-position": sanitizeRegular,
	"background-repeat":   sanitizeRegular,
	"background-size":     sanitizeRegular,
	"color":               sanitizeRegular,
	"height":              sanitizeRegular,
	"width":               sanitizeRegular,
	"left":                sanitizeRegular,
	"right":               sanitizeRegular,
	"top":                 sanitizeRegular,
	"bottom":              sanitizeRegular,
	"font-weight":         sanitizeRegular,
	"padding":             sanitizeRegular,
	"z-index":             sanitizeRegular,
}

var validURLPrefixes = []string{
	`url("`,
	`url('`,
	`url(`,
}

var validURLSuffixes = []string{
	`")`,
	`')`,
	`)`,
}

func sanitizeBackgroundImage(v string) string {
	// Check for <> as per https://github.com/google/safehtml/blob/be23134998433fcf0135dda53593fc8f8bf4df7c/style.go#L87C2-L89C3
	if strings.ContainsAny(v, "<>") {
		return InnocuousPropertyValue
	}
	for _, u := range strings.Split(v, ",") {
		u = strings.TrimSpace(u)
		var found bool
		for i, prefix := range validURLPrefixes {
			if strings.HasPrefix(u, prefix) && strings.HasSuffix(u, validURLSuffixes[i]) {
				found = true
				u = strings.TrimPrefix(u, validURLPrefixes[i])
				u = strings.TrimSuffix(u, validURLSuffixes[i])
				break
			}
		}
		if !found || !urlIsSafe(u) {
			return InnocuousPropertyValue
		}
	}
	return v
}

func urlIsSafe(s string) bool {
	u, err := url.Parse(s)
	if err != nil {
		return false
	}
	if u.IsAbs() {
		if strings.EqualFold(u.Scheme, "http") || strings.EqualFold(u.Scheme, "https") || strings.EqualFold(u.Scheme, "mailto") {
			return true
		}
		return false
	}
	return true
}

var genericFontFamilyName = regexp.MustCompile(`^[a-zA-Z][- a-zA-Z]+$`)

func sanitizeFontFamily(s string) string {
	for _, f := range strings.Split(s, ",") {
		f = strings.TrimSpace(f)
		if strings.HasPrefix(f, `"`) {
			if !strings.HasSuffix(f, `"`) {
				return InnocuousPropertyValue
			}
			continue
		}
		if !genericFontFamilyName.MatchString(f) {
			return InnocuousPropertyValue
		}
	}
	return s
}

func sanitizeEnum(s string) string {
	if !safeEnumPropertyValuePattern.MatchString(s) {
		return InnocuousPropertyValue
	}
	return s
}

func sanitizeRegular(s string) string {
	if !safeRegularPropertyValuePattern.MatchString(s) {
		return InnocuousPropertyValue
	}
	return s
}

// InnocuousPropertyName is an innocuous property generated by a sanitizer when its input is unsafe.
const InnocuousPropertyName = "zTemplUnsafeCSSPropertyName"

// InnocuousPropertyValue is an innocuous property generated by a sanitizer when its input is unsafe.
const InnocuousPropertyValue = "zTemplUnsafeCSSPropertyValue"

// safeRegularPropertyValuePattern matches strings that are safe to use as property values.
// Specifically, it matches string where every '*' or '/' is followed by end-of-text or a safe rune
// (i.e. alphanumerics or runes in the set [+-.!#%_ \t]). This regex ensures that the following
// are disallowed:
//   - "/*" and "*/", which are CSS comment markers.
//   - "//", even though this is not a comment marker in the CSS specification. Disallowing
//     this string minimizes the chance that browser peculiarities or parsing bugs will allow
//     sanitization to be bypassed.
//   - '(' and ')', which can be used to call functions.
//   - ',', since it can be used to inject extra values into a property.
//   - Runes which could be matched on CSS error recovery of a previously malformed token, such as '@'
//     and ':'. See http://www.w3.org/TR/css3-syntax/#error-handling.
var safeRegularPropertyValuePattern = regexp.MustCompile(`^(?:[*/]?(?:[0-9a-zA-Z+-.!#%_ \t]|$))*$`)

// safeEnumPropertyValuePattern matches strings that are safe to use as enumerated property values.
// Specifically, it matches strings that contain only alphabetic and '-' runes.
var safeEnumPropertyValuePattern = regexp.MustCompile(`^[a-zA-Z-]*$`)

// SanitizeStyleValue escapes s so that it is safe to put between "" to form a CSS <string-token>.
// See syntax at https://www.w3.org/TR/css-syntax-3/#string-token-diagram.
//
// On top of the escape sequences required in <string-token>, this function also escapes
// control runes to minimize the risk of these runes triggering browser-specific bugs.
// Taken from cssEscapeString in safehtml package.
func SanitizeStyleValue(s string) string {
	var b bytes.Buffer
	b.Grow(len(s))
	for _, c := range s {
		switch {
		case c == '\u0000':
			// Replace the NULL byte according to https://www.w3.org/TR/css-syntax-3/#input-preprocessing.
			// We take this extra precaution in case the user agent fails to handle NULL properly.
			b.WriteString("\uFFFD")
		case c == '<', // Prevents breaking out of a style element with `</style>`. Escape this in case the Style user forgets to.
			c == '"', c == '\\', // Must be CSS-escaped in <string-token>. U+000A line feed is handled in the next case.
			c <= '\u001F', c == '\u007F', // C0 control codes
			c >= '\u0080' && c <= '\u009F', // C1 control codes
			c == '\u2028', c == '\u2029':   // Unicode newline characters
			// See CSS escape sequence syntax at https://www.w3.org/TR/css-syntax-3/#escape-diagram.
			fmt.Fprintf(&b, "\\%06X", c)
		default:
			b.WriteRune(c)
		}
	}
	return b.String()
}
